####################################
#Diff-in-diff with matching and dual-distance cutoffs - NJ figure, dd with dd 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
####################################

########################
#Initial setup 
########################
#set wd
try(setwd("~/Dropbox/Collaborations/Traffic/project_analysis"), TRUE)
try(setwd("~/Dropbox/Traffic/project_analysis"), TRUE)

#external libraries 
library(ggplot2)

##############################
#set variable values
##############################

#load in data 
NJ_data <- read.csv("NJ_precinct_master.csv", header=T)
#PA_data <- read.csv("PA_precinct_master.csv", header=T)

#set general specifications for later modifications
my_data <- NJ_data 

#outcomes 
outcome_time1_name <- "USP_00"
outcome_time2_name <- "USP_04"  #US president (dem) percent 2000, 2004

#identify matching variables 
#matching_variables <- c("cepctblack", "cnipoverty", "cnpopurban") #income, commute length, number of cars, etc.
matching_variables <- c("census_2000_pblack", "cnipoverty")#"census_2000_200over")
my_data <- my_data[!rowSums(is.na(my_data[,matching_variables])), ]#drop obs with missingness in control vars 

#treated basis (something having to do with E-ZPasses)
#treatment_basis_name <- "EZDist_PA_NEAR_DIST" 
#treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_PA_Total_Time"
treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_NJ_Total_Time"

#control basis (something having to do with non-E-ZPass highway)
#control_basis_name <- "minDist_nonEZ_hwy"
control_basis_name <- "minNetworkOptimizedDistance_Distance_nonEZ_hwy"

#define cutoffs 
##############################
#loop to generate figure 
##############################
max_length_out <- 10; min_length_out <- 5;
#max_seq <- seq(250, 3000, length.out = max_length_out)#crow
#min_seq <- seq(250, 38000, length.out = min_length_out)#crow

##giver of good results 
#max_seq <- seq(5, 14, length.out = max_length_out)
#min_seq <- seq(14, 36.7, length.out = min_length_out)
max_seq <- seq(2, 7, length.out = max_length_out)
min_seq <- seq(7, 14, length.out = min_length_out)

##test
#max_seq <- seq(2, 60, length.out = max_length_out)
#min_seq <- seq(15, 60, length.out = min_length_out)


BIG_BALANCE_LIST <- list()
BIG_STORAGE_LIST <- list()
source("./causal_effect_estimation/dd_with_matchBoot_function.R"); counter1 <- 0 
for(j in min_seq)
{ 
  counter1 <- counter1 + 1 
  results_list <- list(); balance_list <- list()
  counter2 <- 0; for(i in max_seq)
  { 
    counter2 <- counter2 + 1
    print(counter2)
    RESULT <- dd_with_dd(my_data=my_data,
                         treatment_basis_name=treatment_basis_name, 
                         control_basis_name=control_basis_name, 
                         outcome_time1_name=outcome_time1_name, 
                         outcome_time2_name=outcome_time2_name,
                         matching_variables=matching_variables,
                         treatment_basis_treated_max=i, 
                         treatment_basis_control_min=j, 
                         control_basis_control_max=i,
                         control_basis_treated_min=j, 
                         boostrap_SDs=T, 
                         bootstrap_numb=100, 
                         conf_level=0.999, 
                         my_method = "nearest", 
                         my_replace=T, 
                         start_browser=T)
    results_list[[counter2]] <- unlist(RESULT[[1]])
    balance_list[[counter2]] <- RESULT[[2]]
  }
  BIG_STORAGE_LIST[[counter1]] <- results_list
  BIG_BALANCE_LIST[[counter1]] <- balance_list
}


####################
#save and plot results
####################
#save results as appropriate objects 
results_list <- unlist(BIG_STORAGE_LIST, recursive=F, use.names=T)
index_seq <- seq(1, max_length_out*min_length_out)
boot_used <- T
#match_name <- "nearest_crowflies"
match_name <- "nearest_NetDistMinDist_NJ"
#match_name <- "nearest_NetDistMinTime"
for(index in 1:min_length_out)
{
  start <- max_length_out*(index-1)+1
  end <- max_length_out*(index)
  
  internal_list <- results_list[start:end]
  
  unlisted_results <- unlist(internal_list, recursive=F, use.names=F) 
  base_results <- unlist(unlisted_results[(1:length(unlisted_results))%%4==1])
  upper <- unlist(unlisted_results[(1:length(unlisted_results))%%4==2])
  lower <- unlist(unlisted_results[(1:length(unlisted_results))%%4==3])
  match_numbs <- unlist(unlisted_results[(1:length(unlisted_results))%%4==0])
  
  gg_data <- data.frame(cbind(max_seq, base_results, lower, upper, match_numbs))
  
  if(boot_used==T)
  { 
    g <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
      geom_point() + 
      geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
      geom_hline(yintercept = 0, line_type="dashed") + 
      geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
      ylab("DiD effect (in % points)") + 
      xlab("Maximum distance from E-ZPass or Control Highway") + 
      ggtitle("Maximum Distance vs. DiD Effect (in % points)") + 
      theme(text = element_text(size=13)) 
    ggsave(g, file=paste(match_name, sprintf("%i.png", index), sep=""))
  }
  else
  { 
    g <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
      geom_point() + 
      geom_hline(yintercept = 0, line_type="dashed") + 
      ylab("DiD effect (in % points)") + 
      xlab("Maximum distance from E-ZPass or Control Highway") + 
      ggtitle("Maximum Distance vs. DiD Effect (in % points)") + 
      theme(text = element_text(size=13)) 
    ggsave(g, file=paste(match_name, sprintf("%i.png", index), sep=""))
  }
}


export_match_matrix <- F
#for extracting match matrix
if(export_match_matrix==T)
{ 
match_matrix <- read.csv("match_matrix_NJ.csv")
treatment <- as.numeric(as.character(match_matrix$treated_units))
control <- as.numeric(as.character(match_matrix$control_units))

treatment_df <- cbind(my_data[treatment, c("GEOID10", "INTPTLAT_1","INTPTLON_1")], 1)
control_df <- cbind(my_data[control, c("GEOID10", "INTPTLAT_1","INTPTLON_1")], 0)
colnames(treatment_df)[4] <- "treatment.status"
colnames(control_df)[4] <- "treatment.status"

export <- cbind(treatment_df, control_df)
colnames(export) <- c("T_GEOID10", "T_INTPTLAT_1", "T_INTPTLON_1", "T_treatment.status", 
                      "C_GEOID10", "C_INTPTLAT_1", "C_INTPTLON_1", "C_treatment.status")

write.csv(export,file="matched_precincts_NJ_better_criteria.csv")
}